---
title: "Replication file for The Realities Facing Graduate Students: Before, During, and After the 2020 COVID-19 Pandemic"
author: "Pashayan, Kehlenbach, Ye, Mueller, and Willis"
output: html_document
date: "'r Sys.Date()'"
---

# Setup and Load Datasets 
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```

```{r, echo=FALSE, message=FALSE, warning=FALSE, results='asis'}
#load packages
library(dplyr)
library(wordcloud)
library(RColorBrewer)
library(wordcloud2)
library(tm)
library(ggplot2)
library(forcats)

#Set working directory to the folder that has the datasets
setwd("C:/Users/user/Dropbox/Publications/Published/APSA Grad Student Article")

#Upload data
#2018 survey
apsa2018 <- read.csv("2018 Job Market Experiences Survey_Cleaned and Anonymized Dataset.csv")

#2020 survey
apsa2020 <- read.csv("2020_Grad_Student_Climate_Survey_rawdata.csv")

#2022 survey
apsa2022 <- read.csv("2021-22 APSA Member Survey_6.22.22_gradstudents.csv")

#clean the data by removing rows with survey questions
apsa2018 <- apsa2018[-1,]
apsa2020 <- apsa2020[-1,]
apsa2022 <- apsa2022[-1,]
```

#Replications of Tables, Figures, and Descriptive Statistics
```{r, echo=FALSE, message=FALSE, warning=FALSE, results='asis'}
#Table 1 (p. 2), sample sizes

#2018 survey contains both graduate students and post graduates (asks about job market experiences)
summary(apsa2018$Dummy.ID)
#sample is 245 observations

#subset grad students in 2020 survey
apsa2020gr <- subset(apsa2020, Q1 == 'Yes, I am a political science graduate student.')
summary(apsa2020gr$ResponseId)
#sample is 317 observations

#subset grad students in 2022 survey
apsa2022gr <- subset(apsa2022, Q78 == 'Graduate Student')
summary(apsa2022gr$ResponseId)
#sample is 277 observations
```

#Statistics on p. 2
```{r, echo=FALSE, message=FALSE, warning=FALSE, results='asis'}
#2018 respondent distribution of Q9 Do you have a PhD?

Q9 <- table(apsa2018$Q9)
Q9/length(apsa2018$Q9)*100

#2022 respondent distribution of Q84 How long have you been a member of APSA?
Q84 <- table(apsa2022gr$Q84)
Q84/length(apsa2022gr$Q84)*100

#2022 respondent distribution of Q143 Do you attend the APSA Annual Meeting (virtual and/ or in-person)?

Q143 <- table(apsa2022gr$Q143)
Q143/length(apsa2022gr$Q143)*100

#Distribution of percentage of respondents for Q143
#I attend every year. I attend some years. 
#14.80144             55.95668 

attending <- 14.80144 + 55.95668 
attending #percentage of respondents who attend APSA's annual conference regularly

#2022 respondent distribution of reasons for maintaining APSA membership (Q85 Why are you a member of APSA? Select all that apply. - Selected Choice)
apsa2022_Q85 <- data.frame(c(apsa2022gr$Q85, apsa2022$X, apsa2022gr$X.1, apsa2022gr$X.2, apsa2022gr$X.3, apsa2022gr$X.4, apsa2022gr$X.5, apsa2022gr$X.6, apsa2022gr$X.7))
apsa2022_Q85[apsa2022_Q85 == "" | apsa2022_Q85 == " "] <- NA 
apsa2022_Q85 <- na.omit(apsa2022_Q85)

Q85 <- table(apsa2022_Q85)
Q85/length(apsa2022gr$Q85)*100
```

#Figure 1 (pp. 2-3), word cloud from 2022 survey
```{r, echo=FALSE, message=FALSE, warning=FALSE, results='asis'}
#Q143.1- "Please feel free to provide an example of one or any of the problems you are facing as a graduate student."

#Create a vector containing only the text
text <- apsa2022gr$Q143.1

# Create a corpus, removing stopwords (e.g. "the", "and", etc.)  
docs <- Corpus(VectorSource(text))
docs <- docs
docs <- tm_map(docs, content_transformer(tolower))
docs <- tm_map(docs, removeWords, stopwords("english"))
dtm <- TermDocumentMatrix(docs) 
matrix <- as.matrix(dtm) 
words <- sort(rowSums(matrix),decreasing=TRUE) 
df <- data.frame(word = names(words),freq=words)

#Create wordcloud
wordcloud(words = df$word, freq = df$freq, min.freq = 1,           
  max.words=50, random.order=FALSE, rot.per=0.35,            
  colors=brewer.pal(8, "Dark2"))
```

#Table 2 (p. 3) Percentages: Graduate Student Respondents’ Reported Race or Ethnicity Across Surveys
```{r, echo=FALSE, message=FALSE, warning=FALSE, results='asis'}
#2018 distribution of respondents' reported racial and ethnic identity (Q52 What racial or ethnic group best describes you? (Check all that apply) - Selected Choice)

#Clean data and subset Q52 options

apsa2018_Q52.2 <- data.frame(apsa2018$X.13)
apsa2018_Q52.2[apsa2018_Q52.2 == "" | apsa2018_Q52.2 == " "] <- NA 
apsa2018_Q52.2 <- na.omit(apsa2018_Q52.2)

apsa2018_Q52.3 <- data.frame(apsa2018$X.14)
apsa2018_Q52.3[apsa2018_Q52.3 == "" | apsa2018_Q52.3 == " "] <- NA 
apsa2018_Q52.3 <- na.omit(apsa2018_Q52.3)

apsa2018_Q52.4 <- data.frame(apsa2018$X.15)
apsa2018_Q52.4[apsa2018_Q52.4 == "" | apsa2018_Q52.4 == " "] <- NA 
apsa2018_Q52.4 <- na.omit(apsa2018_Q52.4)

apsa2018_Q52 <- data.frame(c(apsa2018$Q52, apsa2018_Q52.2$apsa2018.X.13, apsa2018_Q52.3$apsa2018.X.14, apsa2018_Q52.4$apsa2018.X.15))

Q52 <- table(apsa2018_Q52)
Q52/length(apsa2018$Q52)*100

#2018 percentage distribution for Q52

#(Blank)
#                                 13.0612245 
#Black, Afro-Caribbean,  or African American 
#                                  4.0816327 
#               East Asian or Asian American 
#                                  3.6734694 
#              Latino/a or Hispanic American 
#                                 10.2040816 
#            Middle Eastern or Arab American 
#                                  2.8571429 
#          Native American or Alaskan Native 
#                                  0.4081633 
#        Non-Hispanic White or Euro-American 
#                                 62.0408163
#                     Other (please specify) 
#                                  3.6734694 
#                       Prefer not to answer 
#                                  4.8979592 
#             South Asian or Indian American 
#                                  2.4489796 

#Blank or prefer not to answer
no_ans <- 4.8979592 + 13.0612245 
no_ans

#Asian (other than Middle Eastern)
Asia <- 2.4489796 + 4.0816327
Asia

#2020-2021 distribution of respondents' reported racial and ethnic identity (Q36 With which racial or ethnic group do you personally and most closely identify? (Mark all that apply.) - Selected Choice)

#Clean data and subset Q36 options
apsa2020_Q36.2 <- data.frame(apsa2020gr$X)
apsa2020_Q36.2[apsa2020_Q36.2 == "" | apsa2020_Q36.2 == " "] <- NA 
apsa2020_Q36.2 <- na.omit(apsa2020_Q36.2)

apsa2020_Q36.3 <- data.frame(apsa2020gr$X.1)
apsa2020_Q36.3[apsa2020_Q36.3 == "" | apsa2020_Q36.3 == " "] <- NA 
apsa2020_Q36.3 <- na.omit(apsa2020_Q36.3)

apsa2020_Q36 <- data.frame(c(apsa2020gr$Q36, apsa2020_Q36.2$apsa2020gr.X, apsa2020_Q36.3$apsa2020gr.X.1))

Q36 <- table(apsa2020_Q36)
Q36/length(apsa2020gr$Q36)*100

#2020-2021 percentage distribution for Q36
#                         (Blank)         Asian (other than Middle Eastern). 
#                          8.5173502                           8.5173502 
#          Black or African descent.              Hispanic or Latino/a/x 
#                          3.4700315                          11.0410095 
#   Middle Eastern or North African. Native American or American Indian. 
#                          3.1545741                           0.6309148 
#             Other (explain below).                   Pacific Islander. 
#                          3.1545741                           0.3154574 
#             Prefer not to respond.          White or European descent. 
#                          4.4164038                          63.0914826 

#Blank or prefer not to answer
no_ans20 <- 4.4164038 + 8.5173502 
no_ans20

#2022 distribution of respondents' reported racial and ethnic identity (Q77 What racial or ethnic group best describes you? Select all that apply. - Selected Choice)

#Clean data and subset Q77 options
apsa2022_Q77.2 <- data.frame(apsa2022gr$X.8)
apsa2022_Q77.2[apsa2022_Q77.2 == "" | apsa2022_Q77.2 == " "] <- NA 
apsa2022_Q77.2 <- na.omit(apsa2022_Q77.2)

apsa2022_Q77.3 <- data.frame(apsa2022gr$X.9)
apsa2022_Q77.3[apsa2022_Q77.3 == "" | apsa2022_Q77.3 == " "] <- NA 
apsa2022_Q77.3 <- na.omit(apsa2022_Q77.3)

apsa2022_Q77.4 <- data.frame(apsa2022gr$X.10)
apsa2022_Q77.4[apsa2022_Q77.4 == "" | apsa2022_Q77.4 == " "] <- NA 
apsa2022_Q77.4 <- na.omit(apsa2022_Q77.4)

apsa2022_Q77 <- data.frame(c(apsa2022gr$Q77, apsa2022_Q77.2$apsa2022gr.X.8, apsa2022_Q77.3$apsa2022gr.X.9, apsa2022_Q77.4$apsa2022gr.X.10))

Q77 <- table(apsa2022_Q77)
Q77/length(apsa2022gr$Q77)*100

#(Blanks)   
#                                 1.8050542 
#Black, Afro-Caribbean, or African American 
#                                 6.4981949 
#              East Asian or Asian American 
#                                10.8303249 
#             Latino/a or Hispanic American 
#                                 7.5812274 
#           Middle Eastern or Arab American 
#                                 2.5270758 
#         Native American or Alaskan Native 
#                                 1.0830325 
# Native Hawaiian or Other Pacific Islander 
#                                 0.3610108 
#       Non-Hispanic White or Euro-American 
#                                56.6787004 
#                    Other (please specify) 
#                                 4.6931408 
#                      Prefer not to answer 
#                                 9.3862816 
#            South Asian or Indian American 
#                                 5.0541516 

#Blank or prefer not to answer
no_ans22 <- 1.8050542 + 9.3862816 
no_ans22

#Please note that the table was created in Microsoft Word using these statistics
```

#Statistics on p. 4
```{r, echo=FALSE, message=FALSE, warning=FALSE, results='asis'}
#2022 rankings about top concerns for grad students (Q142.1 Please rank the top issues you are facing as a graduate student in political science- Job prospects, academic or non-academic)

#Percentages of responses naming job prospects as one of the top concerns
Q142 <- table(apsa2022gr$Q142_1)
Q142/length(apsa2022gr$Q142_1)*100

#2020-2021 responses to questions about how well programs prepare students for jobs

#Q31 How well does your program support and prepare graduate students for academic careers?

#Exclude NAs
apsa2020_Q31 <- data.frame(apsa2020$Q31)
apsa2020_Q31[apsa2020_Q31 == "" | apsa2020_Q31 == " "] <- NA 
apsa2020_Q31 <- na.omit(apsa2020_Q31)

#Percentages of responses about preparation for academic careers
Q31 <- table(apsa2020_Q31)
Q31/length(apsa2020_Q31$apsa2020.Q31)*100

#Percentages of good preparation for academic careers
#Somewhat well. Very well.
#48.620690      18.965517 

acjobs <- 48.620690 + 18.965517   
acjobs #Percentage of good preparation for academic careers

#Q30 How well does your program support and prepare graduate students for non-academic careers? (Examples: policy analyst, political consultant, etc.)

#Exclude NAs
apsa2020_Q30 <- data.frame(apsa2020$Q30)
apsa2020_Q30[apsa2020_Q30 == "" | apsa2020_Q30 == " "] <- NA 
apsa2020_Q30 <- na.omit(apsa2020_Q30)

#Percentages of responses about preparation for academic careers
Q30 <- table(apsa2020_Q30)
Q30/length(apsa2020_Q30$apsa2020.Q30)*100

#Percentages of poor preparation for non-academic careers
#Somewhat poorly.     Very poorly. 
#28.719723            30.103806 

nacjobs <- 28.719723 + 30.103806 
nacjobs #Percentage of poor preparation for non-academic careers
                   
#2018 respondent distribution of jobs offered, accepted, and applied for

#Q12 Thinking about all of the jobs you applied for during the 2017-2018 academic year, approximately how many positions did you apply for?

#Subset Q11 and Q12; exclude non-responses
apsa2018_Q1112 <- subset(apsa2018, select=c('Q11','Q12'))
apsa2018_Q1112$Q12[apsa2018_Q1112$Q12 == "" | apsa2018_Q1112$Q12 == " "] <- NA 
apsa2018_Q1112 <- na.omit(apsa2018_Q1112)

#Percentage distribution of number of jobs applied for in 2017-2018
Q12 <- table(apsa2018$Q12)
Q12/length(apsa2018$Q12)*100

#percentages of respondents applying for more than 25 jobs
#           26-50       51-75       76-100     101 or more jobs
#     24.4897959%  20.8163265%   10.2040816%     4.0816327%

m25 <- 24.4897959 + 20.8163265 + 10.2040816 + 4.0816327
m25 #percentage applying for more than 25 jobs

m50 <- m25 - 24.4897959
m50 #percentage applying for more than 50 jobs

#subset respondents who received a job offer (Q11 Did you apply, receive an offer, and accept an offer for a job (academic, nonacademic, or other) in 2017-2018?)
jobs2018 <- subset(apsa2018_Q1112, Q11 == 'Yes')

Q12j <- table(jobs2018$Q12)
Q12j/length(apsa2018_Q1112$Q12)*100 #percentage distribution of number of jobs applied for for respondents with a job offer

#subset respondents who did not receive a job offer
njobs2018 <- subset(apsa2018_Q1112, Q11 == 'No')

Q12n <- table(njobs2018$Q12)
Q12n/length(apsa2018_Q1112$Q12)*100

#percentages of respondents who did not get an offer and applied for more than 50 jobs
#101 or more jobs    51-75           76-100          
#       0.8230453%   4.9382716%      3.7037037%     

n50 <-  0.8230453 + 4.9382716 + 3.7037037
n50 #percentage of respondents who did not receive an offer and applied for more than 50 jobs

#percentages of those who received a job offer
Q11 <- table(apsa2018$Q11)
Q11/length(apsa2018$Q11)*100 

#2018 distribution of the types of jobs respondents applied for (Q16 Which of the following job categories describe the position(s) you applied for during the 2017-2018 academic year? Select all that apply.)

#Subset Q16 options and exclude NAs
apsa2018_Q16 <- data.frame(c(apsa2018$Q16, apsa2018$X, apsa2018$X.1, apsa2018$X.2, apsa2018$X.3))
apsa2018_Q16[apsa2018_Q16 == "" | apsa2018_Q16 == " "] <- NA 
apsa2018_Q16 <- na.omit(apsa2018_Q16)

Q16 <- table(apsa2018_Q16)
Q16/length(apsa2018$Q16)*100
```

#Figure 2 (p. 4) 2020-2021 Q30: How well does your program support and prepare graduate students for non-academic careers? (Examples: policy analyst, political consultant, etc.)
```{r, echo=FALSE, message=FALSE, warning=FALSE, results='asis'}
fig2 <- ggplot(apsa2020_Q30, aes(x = fct_rev(fct_infreq(apsa2020.Q30))))
fig2 + geom_bar(fill = "chartreuse3") + xlab('How well does your program support and prepare graduate students for non-academic careers?') + ylim(0,100) + ylab('Count')
```

#Statistics on p. 5
```{r, echo=FALSE, message=FALSE, warning=FALSE, results='asis'}
#2018 Q18 What were the most important factors to you in searching for a position in 2017-2018? Select all that apply.

#subset Q18 to exclude NAs
apsa2018_Q18 <- data.frame(c(apsa2018$Q18, apsa2018$X.4, apsa2018$X.5, apsa2018$X.6, apsa2018$X.7, apsa2018$X.8, apsa2018$X.9, apsa2018$X.10, apsa2018$X.11, apsa2018$X.12))
apsa2018_Q18[apsa2018_Q18 == "" | apsa2018_Q18 == " "] <- NA 
apsa2018_Q18 <- na.omit(apsa2018_Q18)

Q18 <- table(apsa2018_Q18)
Q18/length(apsa2018$Q18)*100

#2018 Q19 Please rank these factors from most to least important by dragging and dropping the items below.

#convert to numeric
apsa2018$Q19_1 <- as.numeric(apsa2018$Q19_1)
apsa2018$Q19_2 <- as.numeric(apsa2018$Q19_2)
apsa2018$Q19_3 <- as.numeric(apsa2018$Q19_3)
apsa2018$Q19_4 <- as.numeric(apsa2018$Q19_4)
apsa2018$Q19_5 <- as.numeric(apsa2018$Q19_5)
apsa2018$Q19_6 <- as.numeric(apsa2018$Q19_6)
apsa2018$Q19_7 <- as.numeric(apsa2018$Q19_7)
apsa2018$Q19_8 <- as.numeric(apsa2018$Q19_8)
apsa2018$Q19_9 <- as.numeric(apsa2018$Q19_9)
apsa2018$Q19_10 <- as.numeric(apsa2018$Q19_10)
apsa2018$Q19_11 <- as.numeric(apsa2018$Q19_11)

#convert NAs to 0s
apsa2018$Q19_1[is.na(apsa2018$Q19_1)]<- 0
apsa2018$Q19_2[is.na(apsa2018$Q19_2)]<- 0
apsa2018$Q19_3[is.na(apsa2018$Q19_3)]<- 0
apsa2018$Q19_4[is.na(apsa2018$Q19_4)]<- 0
apsa2018$Q19_6[is.na(apsa2018$Q19_6)]<- 0

#subset Q19 to exclude NAs
apsa2018$Q19 <-  apsa2018$Q19_1 + apsa2018$Q19_2 + apsa2018$Q19_3 + apsa2018$Q19_4 + apsa2018$Q19_6
apsa2018_Q19 <- subset(apsa2018, Q19 != 0)

#Get percentages for Q19 rankings
#Salary
Q19.1 <- table(apsa2018_Q19$Q19_1)
Q19.1/length(apsa2018$Q19_1)*100

#Location
Q19.2 <- table(apsa2018_Q19$Q19_2)
Q19.2/length(apsa2018$Q19_2)*100

#Institutional reputation
Q19.3 <- table(apsa2018_Q19$Q19_3)
Q19.3/length(apsa2018$Q19_3)*100

#Teaching load
Q19.4 <- table(apsa2018_Q19$Q19_4)
Q19.4/length(apsa2018$Q19_4)*100

#Professional fit
Q19.6 <- table(apsa2018_Q19$Q19_6)
Q19.6/length(apsa2018$Q19_6)*100

#Percentages for different job search resources (Q40 Which of the following resources did you use when searching for jobs in 2017-2018? Select all that apply with respect to the different types of positions you applied for.)

#Inside Higher Education Careers
Q40_1 <- table(apsa2018$Q40_1)
Q40_1/length(apsa2018$Q40_1)*100

#Percentages of respondents who used Inside Higher Education Careers
#Academic Positions 
#30.6122449% 
#Academic Positions,Nonacademic Positions 
#1.2244898% 
#Nonacademic Positions                                
#0.4081633% 

IHEC <- 30.6122449 + 1.2244898 + 0.4081633
IHEC #Percentage of respondents who used Inside Higher Education Careers

#Vitae (Chronicles of Higher Education)
Q40_2 <- table(apsa2018$Q40_2)
Q40_2/length(apsa2018$Q40_2)*100

#Percentages of respondents who used Vitae (Chronicles of Higher Education)
#Academic Positions 
#37.142857%
#Academic Positions,Nonacademic Positions 
#2.857143%

Vitae <- 37.142857 + 2.857143
Vitae #Percentage of respondents who used Vitae (Chronicles of Higher Education)

#APSA eJobs
Q40_5 <- table(apsa2018$Q40_5)
Q40_5/length(apsa2018$Q40_5)*100

#Percentages of respondents who used APSA eJobs
#Academic Positions 
#72.653061%
#Academic Positions,Nonacademic Positions 
#3.673469% 
#Nonacademic Positions                                
#1.224490%

eJobs <- 72.653061 + 3.673469 + 1.224490  
eJobs #Percentage of respondents who used APSA eJobs

#Personal networks/word of mouth
Q40_13 <- table(apsa2018$Q40_13)
Q40_13/length(apsa2018$Q40_13)*100

#Percentages of respondents who used personal networks/word of mouth
#Academic Positions 
#40.000000%
#Academic Positions,Nonacademic Positions 
#8.571429%
#Nonacademic Positions                                
#3.265306%

networks <- 40 + 8.571429 + 3.265306  
networks #Percentage of respondents who used personal networks/word of mouth

#Percentages of levels of satisfaction with various sources of job market advice (Q40 #2 Using the tick boxes and dropdown options below, please indicate where you got advice on the job market.)

#Academic advisor
Q40.2_1 <- table(apsa2018$Q40.2_1)
Q40.2_1/length(apsa2018$Q40.2_1)*100
#Very useful advice- 30.612245%

#Other professors at home institution
Q40.2_2 <- table(apsa2018$Q40.2_2)
Q40.2_2/length(apsa2018$Q40.2_2)*100
#Very useful advice- 28.571429%

#Personal networks
Q40.2_11 <- table(apsa2018$Q40.2_11)
Q40.2_11/length(apsa2018$Q40.2_11)*100
#Very useful advice- 24.081633%

#2020-2021 distribution of agreement with statement about respect for different political opinions (Q18 The political science profession is tolerant and respectful of differences in political opinion.)

#Exclude NAs
apsa2020_Q18 <- data.frame(apsa2020$Q18)
apsa2020_Q18[apsa2020_Q18 == "" | apsa2020_Q18 == " "] <- NA 
apsa2020_Q18 <- na.omit(apsa2020_Q18)

#Percentages of responses to Q18
Q18_20 <- table(apsa2020_Q18)
Q18_20/length(apsa2020_Q18$apsa2020.Q18)*100

#Agree.     Disagree.   Neutral or unsure. 
#27.181208  23.489933   32.214765 
#Strongly agree. Strongly disagree. 
#5.369128        11.744966

#agreement with Q18
agree_pol <- 27.181208 + 5.369128
agree_pol

#disagreement with Q18
disagree_pol <- 23.489933 + 11.744966
disagree_pol

#2020-2021 distribution of agreement with statement about respect for racial and ethnic differences (Q13: The political science profession is tolerant and respectful of racial and ethnic differences.)

#Exclude NAs
apsa2020_Q13 <- data.frame(apsa2020$Q13)
apsa2020_Q13[apsa2020_Q13 == "" | apsa2020_Q13 == " "] <- NA 
apsa2020_Q13 <- na.omit(apsa2020_Q13)

#Percentages of responses to Q13
Q13 <- table(apsa2020_Q13)
Q13/length(apsa2020_Q13$apsa2020.Q13)*100

#Agree.     Disagree.   Neutral or unsure. 
#22.33333   35.33333    20.66667 
#Strongly agree. Strongly disagree. 
#11.33333        10.33333 

#disagreement with Q13
disagree_eth <- 35.33333 + 10.33333
disagree_eth

#2020-2021 distribution of agreement with statement about respect for sexual and gender differences (Q14: The political science profession is tolerant and respectful of sex and gender differences.)

#Exclude NAs
apsa2020_Q14 <- data.frame(apsa2020$Q14)
apsa2020_Q14[apsa2020_Q14 == "" | apsa2020_Q14 == " "] <- NA 
apsa2020_Q14 <- na.omit(apsa2020_Q14)

#Percentages of responses to Q14
Q14 <- table(apsa2020_Q14)
Q14/length(apsa2020_Q14$apsa2020.Q14)*100

#Agree.     Disagree.   Neutral or unsure. 
#24.242424  33.670034   21.885522 
#Strongly agree. Strongly disagree. 
#9.090909        11.111111 

disagree_gen <-33.670034 + 11.111111
disagree_gen  

#2022 distribution of agreement with statement about departmental inclusion (Q155: Does your department/unit engage in specific activities to create an inclusive environment?)

#respondent percentage distribution for Q155
Q155 <- table(apsa2022gr$Q155)
Q155/length(apsa2022gr$Q155)*100
```

#Figure 3 (p. 5) 2020–2021 Q2: Overall, How Do You Feel About Your Own Financial Security?
```{r, echo=FALSE, message=FALSE, warning=FALSE, results='asis'}
#subset Q2 to exclude NAs
apsa2020_Q2 <- data.frame(apsa2020$Q2)
apsa2020_Q2[apsa2020_Q2 == "" | apsa2020_Q2 == " "] <- NA 
apsa2020_Q2 <- na.omit(apsa2020_Q2)

fig3 <- ggplot(apsa2020_Q2, aes(x = fct_infreq(apsa2020.Q2)))
fig3 + geom_bar(fill = "blue2") + coord_flip() + ylim(0,100) + ylab('Count') + xlab('') + labs(caption ='Overall, how do you feel about your
own financial security?') 

#Note that the order of the bars here is ascending by value; the order in the manuscript is the order of the Likert scale values
```

#Statistics on p. 6
```{r, echo=FALSE, message=FALSE, warning=FALSE, results='asis'}
#2020–2021 Q2: Overall, How Do You Feel About Your Own Financial Security?

Q2 <- table(apsa2020_Q2)
Q2/length(apsa2020_Q2$apsa2020.Q2)*100

#distribution of respondent percentages for Q2
#I feel somewhat insecure.   I feel somewhat secure. 
#                23.56688                  21.01911 
#I feel very insecure.       I feel very secure. 
#                16.24204                  13.69427 
#I have mixed feelings. 
#                25.47771

q2sec <- 21.01911 + 13.69427
q2sec #percentage responding somewhat or very secure

q2insec <- 23.56688 + 16.24204
q2insec #percentage responding somewhat or very insecure

#Percentages of respondents with side gigs (2020-2021 Q8: As a graduate student, have you done any off-campus “side jobs” or “gigs” in order to support yourself or your family financially? (Examples: cashier, waiter, ride-share driver, etc.))

#subset Q8 to exclude NAs
apsa2020_Q8 <- data.frame(apsa2020$Q8)
apsa2020_Q8[apsa2020_Q8 == "" | apsa2020_Q8 == " "] <- NA 
apsa2020_Q8 <- na.omit(apsa2020_Q8)

Q8 <- table(apsa2020_Q8)
Q8/length(apsa2020_Q8$apsa2020.Q8)*100

#distribution of respondent percentages for Q8
#No, and I do not see myself doing so. 
#39.802632 
#No, but I might do so in the future. 
#21.710526 
#Yes, but only during a summer or winter break. 
#9.210526 
#Yes, during the academic year. 
#29.276316 

sidegig <- 9.210526 + 29.276316 
sidegig #percentage of respondents who did side gigs in grad school

#Concerns about student loan debt (2020-2021 Q9: Do you have outstanding student loan debt from before graduate school, and if so, how concerned are you about paying the debt?)

#subset Q9 to exclude NAs
apsa2020_Q9 <- data.frame(apsa2020$Q9)
apsa2020_Q9[apsa2020_Q9 == "" | apsa2020_Q9 == " "] <- NA 
apsa2020_Q9 <- na.omit(apsa2020_Q9)

Q9 <- table(apsa2020_Q9)
Q9/length(apsa2020_Q9$apsa2020.Q9)*100

#distribution of respondent percentages for Q9
#No, I do not have any such debt. 
#                       54.276316 
#Yes, and I am somewhat concerned. 
#                       16.447368 
#Yes, and I am very concerned. 
#                       21.052632 
#Yes, but it causes me little or no concern. 
#                       8.223684 

#Availability of health insurance for graduate students (2020-2021 Q4: Do you currently have health insurance through your institution, and if so, how is the payment of your premiums structured?)

#subset Q4 to exclude NAs
apsa2020_Q4 <- data.frame(apsa2020$Q4)
apsa2020_Q4[apsa2020_Q4 == "" | apsa2020_Q4 == " "] <- NA 
apsa2020_Q4 <- na.omit(apsa2020_Q4)

Q4 <- table(apsa2020_Q4)
Q4/length(apsa2020_Q4$apsa2020.Q4)*100

#Distribution of respondent percentages for Q4
# No, but I have health insurance through a family member. 
# 4.901961 
# No, but I have obtained health insurance independently. 
# 6.209150 
# No, I do not have health insurance coverage. 
# 2.287582 
# Yes, I pay the full cost of my premiums (e.g., through a payroll deduction). 
# 8.823529 
# Yes, my institution covers the full cost of my premiums. 
# 42.156863 
# Yes, my institution subsidizes or shares the cost of my premiums. 
# 35.620915

insurance <- 42.156863 + 35.620915
insurance #percentage of respondents with health insurance through their institution

#Withdrawing from program due to lack of finances (2020-2021 Q12: For financial reasons, have you ever seriously considered withdrawing from your current program?)

apsa2020_Q12 <- data.frame(apsa2020$Q12)
apsa2020_Q12[apsa2020_Q12 == "" | apsa2020_Q12 == " "] <- NA 
apsa2020_Q12 <- na.omit(apsa2020_Q12)

Q12_20 <- table(apsa2020_Q12)
Q12_20/length(apsa2020_Q12$apsa2020.Q12)*100

#distribution of respondent percentages for Q12
#No, I have never considered leaving. 
#                           59.07591 
#Yes, I have often considered leaving. 
#                           12.54125 
#Yes, I have sometimes considered leaving. 
#                           28.38284 
                            
leave <- 12.54125 + 28.38284
leave #percentage of respondents who ever considered leaving their program for financial reasons

#Financial security and family planning (2020-2021 Q11: At this time in your life, as a graduate student, would you feel financially secure if you were to have a(nother) child?)

apsa2020_Q11 <- data.frame(apsa2020$Q11)
apsa2020_Q11[apsa2020_Q11 == "" | apsa2020_Q11 == " "] <- NA 
apsa2020_Q11 <- na.omit(apsa2020_Q11)

Q11_20 <- table(apsa2020_Q11)
Q11_20/length(apsa2020_Q11$apsa2020.Q11)*100

#2022 distributions of ages of children living at home 

#Q222.1 Of your children living at home how many are in each of the following age groups? - Under 2 years old

Q222_1 <- table(apsa2022gr$Q222_1)
Q222_1/length(apsa2022gr$Q222_1)*100

#Q222.2 Of your children living at home how many are in each of the following age groups? - From 2 to 4 years old

Q222_2 <- table(apsa2022gr$Q222_2)
Q222_2/length(apsa2022gr$Q222_2)*100

#Q222.3 Of your children living at home how many are in each of the following age groups? - From 5 to 12 years old

Q222_3 <- table(apsa2022gr$Q222_3)
Q222_3/length(apsa2022gr$Q222_3)*100

#      1          2          3 
#2.5270758  3.6101083  0.7220217 
five_to_twelve <- 2.5270758 + 3.6101083 + 0.7220217 
five_to_twelve #total percentage of respondents with children aged 5-12 living at home

#2022 children living at home (Q171 Do you have children living at home in any of the following age groups?)

#subset Q171
apsa2022_Q171 <- data.frame(c(apsa2022gr$Q171, apsa2022gr$X.11, apsa2022gr$X.12))

Q171 <- table(apsa2022_Q171)
Q171/length(apsa2022gr$Q171)*100

#percentage distribution for Q171
#From 13 to 18 years old 
#1.8050542 
#From 2 to 4 years old          From 5 to 12 years old 
#4.3321300                      7.5812274 
#Over 19 years old              Under 2 years old 
#0.3610108                      6.4981949 
#Unsure or prefer not to answer. 
#2.8880866 

children <- 1.8050542 + 4.3321300 + 7.5812274 + 0.3610108 + 6.4981949 + 2.8880866 
children #percentage of respondents with children
```